library(tidyverse)
library(tidytext)
library(here)
library(lexiconPT)
library(plotly)
theme_set(theme_bw())
reclamacoes = read_csv(here("data/3-avaliacao-humana/reclamacoes-avaliadas-20190515.csv"))
Parsed with column specification:
cols(
  id = col_double(),
  orgao = col_character(),
  data = col_character(),
  titulo = col_character(),
  texto = col_character(),
  link = col_character(),
  `Grupo que vai avaliar` = col_double(),
  insatisfacao = col_double(),
  avaliadores = col_double(),
  range.avaliacoes = col_double()
)
#reclamacoes = reclamacoes_raw %>% 
#    mutate(
#        nome_orgao_site = orgao,
#        orgao = str_split(link, "/") %>% map_chr(~ .[[5]])
#    ) %>% 
#    filter(orgao %in% c("inss-ministerio-da-previdencia-social", #"anac-agencia-nacional-de-aviacao-civil")) %>% 
#    mutate(id = 1:n(), 
#           grupo_avaliando = id %% 6 + 1) 

O processo de estimativa sera muito baseado em https://sillasgonzaga.github.io/2017-09-23-sensacionalista-pt01/ .

data("oplexicon_v3.0")
data("sentiLex_lem_PT02")
op30 <- oplexicon_v3.0
sent <- sentiLex_lem_PT02
glimpse(op30)
Observations: 32,191
Variables: 4
$ term              <chr> "=[", "=@", "=p", "=P", "=x", "=d", "=D", ";)", ";)", ";@", ";*", ";**", ";~", ";D", ";D", ";p",…
$ type              <chr> "emot", "emot", "emot", "emot", "emot", "emot", "emot", "emot", "emot", "emot", "emot", "emot", …
$ polarity          <int> -1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1…
$ polarity_revision <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "…

Precisamos de um dataframe onde cada observacao eh uma palavra.

palavra_a_palavra = reclamacoes %>% 
    select(id, texto) %>% 
    unnest_tokens(termo, texto)
palavra_a_palavra %>%
  select(id, termo) %>%
  head(20)
palavras_com_sentimento = palavra_a_palavra %>% 
  left_join(op30 %>% select(term, op30 = polarity), by = c("termo" = "term")) %>% 
  left_join(sent %>% select(term, sent = polarity), by = c("termo" = "term")) 

Agora, de fato, calculamos qual a polaridade acumulada (via somatorio) de cada reclamacao e salvamos em um csv.

sentimentos = palavras_com_sentimento %>% 
    group_by(id) %>%
    summarise(sentimento_op30 = sum(op30, na.rm = TRUE) *-1,
              palavras_op30 = sum(!is.na(op30)),
              sentimento_sent = sum(sent, na.rm = TRUE) *-1, 
              palavras_sent = sum(!is.na(sent)), 
              palavras = n())
sentimentos %>% 
    write_csv(here("data/5-sentimentos/sentimento.csv"))
x <- sentimentos[2]
normalized_op = ((x-min(x))/(max(x)-min(x)) * 4) + 1
y <- sentimentos[4]
normalized_sent = ((y-min(y))/(max(y)-min(y)) * 4) + 1 
reclamacoes <- reclamacoes %>% mutate(sentimentos_op = normalized_op$sentimento_op30)
reclamacoes <- reclamacoes %>% mutate(sentimentos_sent = normalized_sent$sentimento_sent)
#linearModOp <- lm(insatisfacao ~ sentimentos_op, data=reclamacoes) 
#linearModSent <- lm(insatisfacao ~ sentimentos_sent, data=reclamacoes)
#cor_op <- cor(reclamacoes$sentimentos_op, reclamacoes$insatisfacao)
#cor_sent <- cor(reclamacoes$sentimentos_sent, reclamacoes$insatisfacao)
p <- plot_ly(reclamacoes, x = ~insatisfacao, y = ~sentimentos_op, type = 'scatter')
p
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

q <- plot_ly(reclamacoes, x = ~insatisfacao, y = ~sentimentos_sent, type = 'scatter')
q
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
LS0tCnRpdGxlOiAiQW5hbGlzYSBzZW50aW1lbnRvcyBkYXMgcmVjbGFtYWNvZXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KGhlcmUpCmxpYnJhcnkobGV4aWNvblBUKQpsaWJyYXJ5KHBsb3RseSkKdGhlbWVfc2V0KHRoZW1lX2J3KCkpCmBgYAoKYGBge3IgY2FycmVnYX0KcmVjbGFtYWNvZXMgPSByZWFkX2NzdihoZXJlKCJkYXRhLzMtYXZhbGlhY2FvLWh1bWFuYS9yZWNsYW1hY29lcy1hdmFsaWFkYXMtMjAxOTA1MTUuY3N2IikpCgojcmVjbGFtYWNvZXMgPSByZWNsYW1hY29lc19yYXcgJT4lIAojICAgIG11dGF0ZSgKIyAgICAgICAgbm9tZV9vcmdhb19zaXRlID0gb3JnYW8sCiMgICAgICAgIG9yZ2FvID0gc3RyX3NwbGl0KGxpbmssICIvIikgJT4lIG1hcF9jaHIofiAuW1s1XV0pCiMgICAgKSAlPiUgCiMgICAgZmlsdGVyKG9yZ2FvICVpbiUgYygiaW5zcy1taW5pc3RlcmlvLWRhLXByZXZpZGVuY2lhLXNvY2lhbCIsICMiYW5hYy1hZ2VuY2lhLW5hY2lvbmFsLWRlLWF2aWFjYW8tY2l2aWwiKSkgJT4lIAojICAgIG11dGF0ZShpZCA9IDE6bigpLCAKIyAgICAgICAgICAgZ3J1cG9fYXZhbGlhbmRvID0gaWQgJSUgNiArIDEpIApgYGAKCk8gcHJvY2Vzc28gZGUgZXN0aW1hdGl2YSBzZXJhIG11aXRvIGJhc2VhZG8gZW0gaHR0cHM6Ly9zaWxsYXNnb256YWdhLmdpdGh1Yi5pby8yMDE3LTA5LTIzLXNlbnNhY2lvbmFsaXN0YS1wdDAxLyAuIAoKYGBge3IgcHJlcGFyYV9zZW50aW1lbnRvfQpkYXRhKCJvcGxleGljb25fdjMuMCIpCmRhdGEoInNlbnRpTGV4X2xlbV9QVDAyIikKCm9wMzAgPC0gb3BsZXhpY29uX3YzLjAKc2VudCA8LSBzZW50aUxleF9sZW1fUFQwMgoKZ2xpbXBzZShvcDMwKQpgYGAKClByZWNpc2Ftb3MgZGUgdW0gZGF0YWZyYW1lIG9uZGUgY2FkYSBvYnNlcnZhY2FvIGVoIHVtYSBwYWxhdnJhLiAKCmBgYHtyIHNlcGFyYX0KcGFsYXZyYV9hX3BhbGF2cmEgPSByZWNsYW1hY29lcyAlPiUgCiAgICBzZWxlY3QoaWQsIHRleHRvKSAlPiUgCiAgICB1bm5lc3RfdG9rZW5zKHRlcm1vLCB0ZXh0bykKCnBhbGF2cmFfYV9wYWxhdnJhICU+JQogIHNlbGVjdChpZCwgdGVybW8pICU+JQogIGhlYWQoMjApCgpwYWxhdnJhc19jb21fc2VudGltZW50byA9IHBhbGF2cmFfYV9wYWxhdnJhICU+JSAKICBsZWZ0X2pvaW4ob3AzMCAlPiUgc2VsZWN0KHRlcm0sIG9wMzAgPSBwb2xhcml0eSksIGJ5ID0gYygidGVybW8iID0gInRlcm0iKSkgJT4lIAogIGxlZnRfam9pbihzZW50ICU+JSBzZWxlY3QodGVybSwgc2VudCA9IHBvbGFyaXR5KSwgYnkgPSBjKCJ0ZXJtbyIgPSAidGVybSIpKSAKYGBgCgpBZ29yYSwgZGUgZmF0bywgY2FsY3VsYW1vcyBxdWFsIGEgcG9sYXJpZGFkZSBhY3VtdWxhZGEgKHZpYSBzb21hdG9yaW8pIGRlIGNhZGEgcmVjbGFtYWNhbyBlIHNhbHZhbW9zIGVtIHVtIGNzdi4KCmBgYHtyIGNhbGN1bGFfc2VudGltZW50b3N9CnNlbnRpbWVudG9zID0gcGFsYXZyYXNfY29tX3NlbnRpbWVudG8gJT4lIAogICAgZ3JvdXBfYnkoaWQpICU+JQogICAgc3VtbWFyaXNlKHNlbnRpbWVudG9fb3AzMCA9IHN1bShvcDMwLCBuYS5ybSA9IFRSVUUpICotMSwKICAgICAgICAgICAgICBwYWxhdnJhc19vcDMwID0gc3VtKCFpcy5uYShvcDMwKSksCiAgICAgICAgICAgICAgc2VudGltZW50b19zZW50ID0gc3VtKHNlbnQsIG5hLnJtID0gVFJVRSkgKi0xLCAKICAgICAgICAgICAgICBwYWxhdnJhc19zZW50ID0gc3VtKCFpcy5uYShzZW50KSksIAogICAgICAgICAgICAgIHBhbGF2cmFzID0gbigpKQoKc2VudGltZW50b3MgJT4lIAogICAgd3JpdGVfY3N2KGhlcmUoImRhdGEvNS1zZW50aW1lbnRvcy9zZW50aW1lbnRvLmNzdiIpKQoKeCA8LSBzZW50aW1lbnRvc1syXQpub3JtYWxpemVkX29wID0gKCh4LW1pbih4KSkvKG1heCh4KS1taW4oeCkpICogNCkgKyAxCgp5IDwtIHNlbnRpbWVudG9zWzRdCm5vcm1hbGl6ZWRfc2VudCA9ICgoeS1taW4oeSkpLyhtYXgoeSktbWluKHkpKSAqIDQpICsgMSAKCnJlY2xhbWFjb2VzIDwtIHJlY2xhbWFjb2VzICU+JSBtdXRhdGUoc2VudGltZW50b3Nfb3AgPSBub3JtYWxpemVkX29wJHNlbnRpbWVudG9fb3AzMCkKcmVjbGFtYWNvZXMgPC0gcmVjbGFtYWNvZXMgJT4lIG11dGF0ZShzZW50aW1lbnRvc19zZW50ID0gbm9ybWFsaXplZF9zZW50JHNlbnRpbWVudG9fc2VudCkKCiNsaW5lYXJNb2RPcCA8LSBsbShpbnNhdGlzZmFjYW8gfiBzZW50aW1lbnRvc19vcCwgZGF0YT1yZWNsYW1hY29lcykgCiNsaW5lYXJNb2RTZW50IDwtIGxtKGluc2F0aXNmYWNhbyB+IHNlbnRpbWVudG9zX3NlbnQsIGRhdGE9cmVjbGFtYWNvZXMpCgojY29yX29wIDwtIGNvcihyZWNsYW1hY29lcyRzZW50aW1lbnRvc19vcCwgcmVjbGFtYWNvZXMkaW5zYXRpc2ZhY2FvKQojY29yX3NlbnQgPC0gY29yKHJlY2xhbWFjb2VzJHNlbnRpbWVudG9zX3NlbnQsIHJlY2xhbWFjb2VzJGluc2F0aXNmYWNhbykKCnAgPC0gcGxvdF9seShyZWNsYW1hY29lcywgeCA9IH5pbnNhdGlzZmFjYW8sIHkgPSB+c2VudGltZW50b3Nfb3AsIHR5cGUgPSAnc2NhdHRlcicpCnAKCnEgPC0gcGxvdF9seShyZWNsYW1hY29lcywgeCA9IH5pbnNhdGlzZmFjYW8sIHkgPSB+c2VudGltZW50b3Nfc2VudCwgdHlwZSA9ICdzY2F0dGVyJykKcQpgYGAKCg==